# Load necessary libraries
if (!require(ggplot2)) install.packages("ggplot2")
library(ggplot2)
# Generate sample data
set.seed(42) # For reproducibility
<- rnorm(100, mean = 50, sd = 10) # Sample x-values
x <- x * 1.5 + rnorm(100, sd = 5) # Corresponding y-values with some added noise
y
# Create a data frame from the vectors
<- data.frame(x, y)
data
# Generate the scatter plot
ggplot(data, aes(x = x, y = y)) +
geom_point(aes(color = x), size = 3, alpha = 0.6) + # Points with color gradient based on x-value
theme_minimal() + # Clean theme
labs(title = "Scatter Plot of X vs Y",
x = "X Axis Label",
y = "Y Axis Label") +
scale_color_gradient(low = "blue", high = "red") + # Color gradient from blue to red
theme(plot.title = element_text(hjust = 0.5)) # Center the plot title
Scatter plot
Concept- Scatter plot
Scatter plots are a type of data visualization used to display values for typically two continuous variables(typically, but can be used to visualize categorical variables as well) for a set of data. The data are displayed as a collection of points, each having the value of one variable determining the position on the horizontal axis and the value of the other variable determining the position on the vertical axis. Scatter plots are used to observe relationships between variables.
Properties:
Axes: Represent the variables being compared. Typically, the independent variable or predictor is placed on the x-axis, and the dependent variable or response is placed on the y-axis.
Points: Each point on the scatter plot represents an individual data point. The position is determined by the values of the two variables.
Trend: The overall direction of the data points in the plot can suggest a relationship (e.g., positive, negative, or no correlation).
Clusters: Groupings or clusters of points may indicate that those data points have something in common.
Outliers: Points that lie significantly outside the general distribution of data may be highlighted as they could indicate anomalies or special cases.
# Load the ggplot2 package
library(ggplot2)
# Load the mtcars dataset (this step is actually unnecessary as mtcars is built-in)
# Start with the base layer and add layers sequentially
ggplot(mtcars, aes(x = wt, y = mpg)) + # Combine the data and aes
geom_point() + # Add the data layer
stat_smooth(method = "lm", aes(color = "Regression Line"), se = TRUE, linewidth = 1) + # Add the statistical transformation layer
coord_cartesian(xlim = c(1.5, 5.5), ylim = c(10, 35)) + # Add the coordinate system layer
scale_x_continuous(breaks = seq(2, 5, by = 1)) + # Add the scale layer
scale_y_continuous(breaks = seq(10, 35, by = 5)) +
scale_color_manual(values = "red", labels = "Linear Model") + # Assign color to the regression line
labs(title = "Car Weight vs. Mileage", # Add labels and title
x = "Weight (1000 lbs)",
y = "Miles per Gallon",
color = "Line Type") + # Label for legend
theme_bw() + # Add the theme layer
theme(plot.title = element_text(hjust = 0.5), # Customize theme elements
axis.title = element_text(face = "bold"))
Scatter plot - Layer by Layer
1. Creating data and aesthetics Layer.
library(ggplot2)
ggplot(mtcars, aes(x = wt, y = mpg))
2. Adding Geometric layer(geom_point)
ggplot(mtcars, aes(x = wt, y = mpg)) + # Combine the data and aes
geom_point() # Add the data layer
3. Adding Statistical Layer - Regression Line
ggplot(mtcars, aes(x = wt, y = mpg)) + # Combine the data and aes
geom_point() + # Add the data layer
stat_smooth(method = "lm", aes(color = "Regression Line"), se = TRUE, linewidth = 1) # Add the statistical transformation layer
4. Adding the Co-ordinate Layer
ggplot(mtcars, aes(x = wt, y = mpg)) + # Combine the data and aes
geom_point() + # Add the data layer
stat_smooth(method = "lm", aes(color = "Regression Line"), se = TRUE, linewidth = 1) + # Add the statistical transformation layer
coord_cartesian(xlim = c(1.5, 5.5), ylim = c(10, 35)) # Add the coordinate system layer
5. Adding the scale layer
ggplot(mtcars, aes(x = wt, y = mpg)) + # Combine the data and aes
geom_point() + # Add the data layer
stat_smooth(method = "lm", aes(color = "Regression Line"), se = TRUE, linewidth = 1) + # Add the statistical transformation layer
coord_cartesian(xlim = c(1.5, 5.5), ylim = c(10, 35)) + # Add the coordinate system layer
scale_x_continuous(breaks = seq(2, 5, by = 1)) + # Add the scale layer
scale_y_continuous(breaks = seq(10, 35, by = 2))
6. Adding color to Regression Line
ggplot(mtcars, aes(x = wt, y = mpg)) + # Combine the data and aes
geom_point() + # Add the data layer
stat_smooth(method = "lm", aes(color = "Regression Line"), se = TRUE, linewidth = 1) + # Add the statistical transformation layer
coord_cartesian(xlim = c(1, 6), ylim = c(5, 35)) + # Add the coordinate system layer
scale_x_continuous(breaks = seq(1, 6, by = 1)) + # Add the scale layer
scale_y_continuous(breaks = seq(5, 35, by = 2)) +
scale_color_manual(values = "blue", labels = "Linear Model") # Assign color to the regression line
7. Adding labels and Title
ggplot(mtcars, aes(x = wt, y = mpg)) + # Combine the data and aes
geom_point() + # Add the data layer
stat_smooth(method = "lm", aes(color = "Regression Line"), se = TRUE, linewidth = 1) + # Add the statistical transformation layer
coord_cartesian(xlim = c(1, 6), ylim = c(5, 35)) + # Add the coordinate system layer
scale_x_continuous(breaks = seq(1, 6, by = 1)) + # Add the scale layer
scale_y_continuous(breaks = seq(5, 35, by = 2)) +
scale_color_manual(values = "blue", labels = "Linear Model") + # Assign color to the regression line
labs(title = "Car Weight vs. Mileage", # Add labels and title
x = "Weight (1000 lbs)",
y = "Miles per Gallon",
color = "Line Type") # Label for legend
8. Adding Themes
ggplot(mtcars, aes(x = wt, y = mpg)) + # Combine the data and aes
geom_point() + # Add the data layer
stat_smooth(method = "lm", aes(color = "Regression Line"), se = TRUE, linewidth = 1) + # Add the statistical transformation layer
coord_cartesian(xlim = c(1, 6), ylim = c(5, 35)) + # Add the coordinate system layer
scale_x_continuous(breaks = seq(1, 6, by = 1)) + # Add the scale layer
scale_y_continuous(breaks = seq(5, 35, by = 2)) +
scale_color_manual(values = "blue", labels = "Linear Model") + # Assign color to the regression line
labs(title = "Car Weight vs. Mileage", # Add labels and title
x = "Weight (1000 lbs)",
y = "Miles per Gallon",
color = "Line Type") + # Label for legend
theme_bw() + # Add the theme layer
theme(plot.title = element_text(hjust = 0.5), # Customize theme elements
axis.title = element_text(face = "bold"))